# -*- coding:utf-8 -*-

import json
from bs4 import BeautifulSoup
import requests

'''
获取武汉大学所有院系名称
'''
name_link = []
college_name = []          #该字段仅用于存放武汉大学所有学院名称
college_nameandlink = []              #该字段用于存储武汉大学主页上的每个学院名和学院网页链接

def college_info():
	#获取院系信息页面
	url = 'http://www.whu.edu.cn/jgsz/yxsz.htm'
	r = requests.get(url)
	return r.content

def parse_college_name(html):
	soup = BeautifulSoup(html)
	name_link = soup.find_all('li',class_='infos')
	for link in name_link:
		college_name_list = link.get_text().strip()
		college_link_list = link.a.get('href')
		college_name.append(college_name_list)
		college_nameandlink.append({'CollegeName':college_name_list,'CollegeLink':college_link_list})

	print json.dumps(college_nameandlink,indent=4,ensure_ascii=False)
	return college_name          #如果需要学院名称和链接组成的list，可以改成'return college_nameandlink'

if __name__ == '__main__':
	html = college_info()
	parse_college_name(html)
